import graphlab
//anaconda/envs/py27/lib/python2.7/site-packages/graphlab/util/metric_tracker.py:12: UserWarning: The official Mixpanel Python library has moved from mixpanel-py to mixpanel. Updates will be published only to <https://pypi.python.org/pypi/mixpanel>. import mixpanel
song_data = graphlab.SFrame('song_data.gl/')
[INFO] This non-commercial license of GraphLab Create is assigned to dbercz@gmail.com and will expire on January 19, 2017. For commercial licensing options, visit https://dato.com/buy/. [INFO] Start server at: ipc:///tmp/graphlab_server-3039 - Server binary: /anaconda/envs/py27/lib/python2.7/site-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1455309968.log [INFO] GraphLab Server Version: 1.8.1
song_data.head()
| user_id | song_id | listen_count | title | artist |
|---|---|---|---|---|
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOAKIMP12A8C130995 | 1 | The Cove | Jack Johnson |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBBMDR12A8C13253B | 2 | Entre Dos Aguas | Paco De Lucia |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBXHDL12A81C204C0 | 1 | Stronger | Kanye West |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBYHAJ12A6701BF1D | 1 | Constellations | Jack Johnson |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODACBL12A8C13C273 | 1 | Learn To Fly | Foo Fighters |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODDNQT12A6D4F5F7E | 5 | Apuesta Por El Rock 'N' Roll ... |
Héroes del Silencio |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODXRTY12AB0180F3B | 1 | Paper Gangsta | Lady GaGa |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOFGUAY12AB017B0A8 | 1 | Stacked Actors | Foo Fighters |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOFRQTD12A81C233C0 | 1 | Sehr kosmisch | Harmonia |
| b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOHQWYZ12A6D4FA701 | 1 | Heaven's gonna burn your eyes ... |
Thievery Corporation feat. Emiliana Torrini ... |
| song |
|---|
| The Cove - Jack Johnson |
| Entre Dos Aguas - Paco De Lucia ... |
| Stronger - Kanye West |
| Constellations - Jack Johnson ... |
| Learn To Fly - Foo Fighters ... |
| Apuesta Por El Rock 'N' Roll - Héroes del ... |
| Paper Gangsta - Lady GaGa |
| Stacked Actors - Foo Fighters ... |
| Sehr kosmisch - Harmonia |
| Heaven's gonna burn your eyes - Thievery ... |
graphlab.canvas.set_target('ipynb')
song_data['song'].show()
len(song_data)
1116609
users = song_data['user_id'].unique()
len(users)
66346
train_data,test_data = song_data.random_split(.8,seed=0)
popularity_model = graphlab.popularity_recommender.create(train_data,
user_id='user_id',
item_id='song')
PROGRESS: Recsys training: model = popularity PROGRESS: Warning: Ignoring columns song_id, listen_count, title, artist; PROGRESS: To use one of these as a target column, set target = <column_name> PROGRESS: and use a method that allows the use of a target. PROGRESS: Preparing data set. PROGRESS: Data has 893580 observations with 66085 users and 9952 items. PROGRESS: Data prepared in: 0.867886s PROGRESS: 893580 observations to process; with 9952 unique items.
popularity_model.recommend(users=[users[0]])
| user_id | song | score | rank |
|---|---|---|---|
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Sehr kosmisch - Harmonia | 4754.0 | 1 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Undo - Björk | 4227.0 | 2 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
You're The One - Dwight Yoakam ... |
3781.0 | 3 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Dog Days Are Over (Radio Edit) - Florence + The ... |
3633.0 | 4 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Revelry - Kings Of Leon | 3527.0 | 5 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Horn Concerto No. 4 in E flat K495: II. Romance ... |
3161.0 | 6 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Secrets - OneRepublic | 3148.0 | 7 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Hey_ Soul Sister - Train | 2538.0 | 8 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Fireflies - Charttraxx Karaoke ... |
2532.0 | 9 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Tive Sim - Cartola | 2521.0 | 10 |
popularity_model.recommend(users=[users[1]])
| user_id | song | score | rank |
|---|---|---|---|
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Sehr kosmisch - Harmonia | 4754.0 | 1 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Undo - Björk | 4227.0 | 2 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
You're The One - Dwight Yoakam ... |
3781.0 | 3 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Dog Days Are Over (Radio Edit) - Florence + The ... |
3633.0 | 4 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Revelry - Kings Of Leon | 3527.0 | 5 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Horn Concerto No. 4 in E flat K495: II. Romance ... |
3161.0 | 6 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Secrets - OneRepublic | 3148.0 | 7 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Hey_ Soul Sister - Train | 2538.0 | 8 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Fireflies - Charttraxx Karaoke ... |
2532.0 | 9 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Tive Sim - Cartola | 2521.0 | 10 |
personalized_model = graphlab.item_similarity_recommender.create(train_data,
user_id='user_id',
item_id='song')
PROGRESS: Recsys training: model = item_similarity PROGRESS: Warning: Ignoring columns song_id, listen_count, title, artist; PROGRESS: To use one of these as a target column, set target = <column_name> PROGRESS: and use a method that allows the use of a target. PROGRESS: Preparing data set. PROGRESS: Data has 893580 observations with 66085 users and 9952 items. PROGRESS: Data prepared in: 0.936565s PROGRESS: Computing item similarity statistics: PROGRESS: Computing most similar items for 9952 items: PROGRESS: +-----------------+-----------------+ PROGRESS: | Number of items | Elapsed Time | PROGRESS: +-----------------+-----------------+ PROGRESS: | 1000 | 0.415846 | PROGRESS: | 2000 | 0.451759 | PROGRESS: | 3000 | 0.487894 | PROGRESS: | 4000 | 0.53842 | PROGRESS: | 5000 | 0.577101 | PROGRESS: | 6000 | 0.611902 | PROGRESS: | 7000 | 0.64622 | PROGRESS: | 8000 | 0.686704 | PROGRESS: | 9000 | 0.738375 | PROGRESS: +-----------------+-----------------+ PROGRESS: Finished training in 1.11156s
personalized_model.recommend(users=[users[0]])
| user_id | song | score | rank |
|---|---|---|---|
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Riot In Cell Block Number Nine - Dr Feelgood ... |
0.0375 | 1 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Sei Lá Mangueira - Elizeth Cardoso ... |
0.0331632653061 | 2 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
The Stallion - Ween | 0.0322580645161 | 3 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Rain - Subhumans | 0.0314159292035 | 4 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
West One (Shine On Me) - The Ruts ... |
0.0307080895662 | 5 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Back Against The Wall - Cage The Elephant ... |
0.0301204819277 | 6 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Life Less Frightening - Rise Against ... |
0.0284431137725 | 7 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
A Beggar On A Beach Of Gold - Mike And The ... |
0.0230024907156 | 8 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Audience Of One - Rise Against ... |
0.0193938442211 | 9 |
| 279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... |
Blame It On The Boogie - The Jacksons ... |
0.0189873417722 | 10 |
personalized_model.recommend(users=[users[1]])
| user_id | song | score | rank |
|---|---|---|---|
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Grind With Me (Explicit Version) - Pretty Ricky ... |
0.0459424433009 | 1 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
There Goes My Baby - Usher ... |
0.0333266227603 | 2 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Panty Droppa [Intro] (Album Version) - Trey ... |
0.0318658401612 | 3 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Nobody (Featuring Athena Cage) (LP Version) - ... |
0.027853068198 | 4 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Youth Against Fascism - Sonic Youth ... |
0.0263032036922 | 5 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Nice & Slow - Usher | 0.0239837935781 | 6 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Making Love (Into The Night) - Usher ... |
0.0238530544409 | 7 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Naked - Marques Houston | 0.0228925619283 | 8 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Love Lost (Album Version) - Trey Songz ... |
0.0228536024205 | 9 |
| c067c22072a17d33310d7223d 7b79f819e48cf42 ... |
Possessed - Kruiz | 0.0228088947837 | 10 |
personalized_model.get_similar_items(['With Or Without You - U2'])
PROGRESS: Getting similar items completed in 0.008224
| song | similar | score | rank |
|---|---|---|---|
| With Or Without You - U2 | I Still Haven't Found What I'm Looking For ... |
0.0430327868852 | 1 |
| With Or Without You - U2 | Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ... |
0.033734939759 | 2 |
| With Or Without You - U2 | Window In The Skies - U2 | 0.0328358208955 | 3 |
| With Or Without You - U2 | Vertigo - U2 | 0.0300751879699 | 4 |
| With Or Without You - U2 | Sunday Bloody Sunday - U2 | 0.0271317829457 | 5 |
| With Or Without You - U2 | Bad - U2 | 0.0251798561151 | 6 |
| With Or Without You - U2 | A Day Without Me - U2 | 0.0237154150198 | 7 |
| With Or Without You - U2 | Another Time Another Place - U2 ... |
0.020325203252 | 8 |
| With Or Without You - U2 | Walk On - U2 | 0.020202020202 | 9 |
| With Or Without You - U2 | Get On Your Boots - U2 | 0.0196850393701 | 10 |
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])
PROGRESS: Getting similar items completed in 0.002774
| song | similar | score | rank |
|---|---|---|---|
| Chan Chan (Live) - Buena Vista Social Club ... |
Murmullo - Buena Vista Social Club ... |
0.188118811881 | 1 |
| Chan Chan (Live) - Buena Vista Social Club ... |
La Bayamesa - Buena Vista Social Club ... |
0.187192118227 | 2 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Amor de Loca Juventud - Buena Vista Social Club ... |
0.184834123223 | 3 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Diferente - Gotan Project | 0.0214592274678 | 4 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Mistica - Orishas | 0.0205761316872 | 5 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Hotel California - Gipsy Kings ... |
0.019305019305 | 6 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Nacà Orishas - Orishas | 0.0191570881226 | 7 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Le Moulin - Yann Tiersen | 0.0187969924812 | 8 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Gitana - Willie Colon | 0.0187969924812 | 9 |
| Chan Chan (Live) - Buena Vista Social Club ... |
Criminal - Gotan Project | 0.018779342723 | 10 |
if graphlab.version[:3] >= "1.6":
model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
graphlab.show_comparison(model_performance,[popularity_model, personalized_model])
else:
%matplotlib inline
model_performance = graphlab.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)
compare_models: using 2931 users to estimate model performance PROGRESS: Evaluate model M0 PROGRESS: recommendations finished on 1000/2931 queries. users per second: 17001.6 PROGRESS: recommendations finished on 2000/2931 queries. users per second: 22365.4 Precision and recall summary statistics by cutoff +--------+-----------------+------------------+ | cutoff | mean_precision | mean_recall | +--------+-----------------+------------------+ | 1 | 0.0255885363357 | 0.00660202699609 | | 2 | 0.0254179460935 | 0.0131865348344 | | 3 | 0.024337541226 | 0.0195730015843 | | 4 | 0.0223473217332 | 0.0237471406868 | | 5 | 0.0208802456499 | 0.0279004544869 | | 6 | 0.0195610144433 | 0.0313233195782 | | 7 | 0.0186187064386 | 0.0347225124093 | | 8 | 0.0175707949505 | 0.0380009590091 | | 9 | 0.0167936616248 | 0.0408946127984 | | 10 | 0.0159672466735 | 0.0431541003445 | +--------+-----------------+------------------+ [10 rows x 3 columns] PROGRESS: Evaluate model M1 PROGRESS: recommendations finished on 1000/2931 queries. users per second: 1282.29 PROGRESS: recommendations finished on 2000/2931 queries. users per second: 1277.46 Precision and recall summary statistics by cutoff +--------+-----------------+-----------------+ | cutoff | mean_precision | mean_recall | +--------+-----------------+-----------------+ | 1 | 0.185602183555 | 0.0580033551119 | | 2 | 0.155237120437 | 0.0913773449342 | | 3 | 0.13590355965 | 0.11735511541 | | 4 | 0.122824974411 | 0.137502018055 | | 5 | 0.111838962811 | 0.154550447846 | | 6 | 0.103093369726 | 0.170952566142 | | 7 | 0.096602817176 | 0.18653678385 | | 8 | 0.0893039918117 | 0.196434939449 | | 9 | 0.084082034952 | 0.206375680122 | | 10 | 0.0797679972706 | 0.21799005233 | +--------+-----------------+-----------------+ [10 rows x 3 columns] Model compare metric: precision_recall